40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.9-xen-sparse/drivers/xen/blkfront/blkfront.c
40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.9-xen-sparse/drivers/xen/blkfront/block.h
40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.9-xen-sparse/drivers/xen/blkfront/vbd.c
+41a226e0vjAcDXHOnXE5ummcdUD2mg linux-2.6.9-xen-sparse/drivers/xen/blktap/Makefile
+41a226e0VeZA1N8tbU6nvJ3OxUcJmw linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.c
+41a226e1k4J5VMLnrYXDWRqElS49YQ linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap.h
+41a226e1-A_Hy7utS8vJKaXnH_tzfA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c
+41a226e19NoUUTOvs7jumDMRYDIO4Q linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_datapath.c
+41a226e1MNSyWWK5dEVgvSQ5OW0fDA linux-2.6.9-xen-sparse/drivers/xen/blktap/blktap_userdev.c
40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.9-xen-sparse/drivers/xen/console/Makefile
3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.9-xen-sparse/drivers/xen/console/console.c
40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.9-xen-sparse/drivers/xen/evtchn/Makefile
block devices to other guests via a high-performance shared-memory
interface.
+if XEN_BLKDEV_BACKEND
+config XEN_BLKDEV_TAP_BE
+ bool "Block Tap support for backend driver (DANGEROUS)"
+ default n
+ help
+ If you intend to use the block tap driver, the backend domain will
+ not know the domain id of the real frontend, and so will not be able
+ to map its data pages. This modifies the backend to attempt to map
+ from both the tap domain and the real frontend. This presents a
+ security risk, and so should ONLY be used for development
+ with the blktap. This option will be removed as the block drivers are
+ modified to use grant tables.
+endif
+
config XEN_NETDEV_BACKEND
bool "Network-device backend driver"
default y if XEN_PHYSDEV_ACCESS
enabled; then you must say N here.
endif
+config XEN_BLKDEV_TAP
+ bool "Block device tap driver"
+ default n
+ help
+ This driver allows a VM to interact on block device channels
+ to other VMs. Block messages may be passed through or redirected
+ to a character device, allowing device prototyping in application
+ space. Odds are that you want to say N here.
+
+
config XEN_WRITABLE_PAGETABLES
bool
default y
CONFIG_XEN_PRIVILEGED_GUEST=y
CONFIG_XEN_PHYSDEV_ACCESS=y
CONFIG_XEN_BLKDEV_BACKEND=y
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
CONFIG_XEN_NETDEV_BACKEND=y
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
CONFIG_XEN_WRITABLE_PAGETABLES=y
CONFIG_XEN_SCRUB_PAGES=y
# CONFIG_XEN_PRIVILEGED_GUEST is not set
# CONFIG_XEN_PHYSDEV_ACCESS is not set
# CONFIG_XEN_BLKDEV_BACKEND is not set
+# CONFIG_XEN_BLKDEV_TAP_BE is not set
# CONFIG_XEN_NETDEV_BACKEND is not set
CONFIG_XEN_BLKDEV_FRONTEND=y
CONFIG_XEN_NETDEV_FRONTEND=y
+# CONFIG_XEN_BLKDEV_TAP is not set
# CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER is not set
CONFIG_XEN_WRITABLE_PAGETABLES=y
CONFIG_XEN_SCRUB_PAGES=y
obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/
+obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
static kmem_cache_t *buffer_head_cachep;
#endif
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+/*
+ * If the tap driver is used, we may get pages belonging to either the tap
+ * or (more likely) the real frontend. The backend must specify which domain
+ * a given page belongs to in update_va_mapping though. For the moment,
+ * we pass in the domid of the real frontend in PROBE messages and store
+ * this value in alt_dom. Then on mapping, we try both. This is a Guiness
+ * book of records-calibre grim hack, and represents a bit of a security risk.
+ * Grant tables will soon solve the problem though!
+ */
+static domid_t alt_dom = 0;
+#endif
+
static int do_block_io_op(blkif_t *blkif, int max_to_do);
static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
(blkif_last_sect(req->frame_and_sects[0]) != 7) )
goto out;
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ /* Grab the real frontend out of the probe message. */
+ alt_dom = (domid_t)req->frame_and_sects[1];
+#endif
+
if ( HYPERVISOR_update_va_mapping_otherdomain(
MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
(pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
- 0, blkif->domid) )
+ 0, blkif->domid) ) {
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ /* That didn't work. Try alt_dom. */
+ if ( HYPERVISOR_update_va_mapping_otherdomain(
+ MMAP_VADDR(pending_idx, 0) >> PAGE_SHIFT,
+ (pte_t) { (req->frame_and_sects[0] & PAGE_MASK) | __PAGE_KERNEL },
+ 0, alt_dom) )
+ goto out;
+#else
goto out;
-
+#endif
+ }
+
rsp = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0),
PAGE_SIZE / sizeof(vdisk_t));
mcl[i].args[0] = MMAP_VADDR(pending_idx, i) >> PAGE_SHIFT;
mcl[i].args[1] = (phys_seg[i].buffer & PAGE_MASK) | remap_prot;
mcl[i].args[2] = 0;
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ mcl[i].args[3] = (alt_dom != 0) ? alt_dom : blkif->domid;
+#else
mcl[i].args[3] = blkif->domid;
-
+#endif
phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx, i))>>PAGE_SHIFT] =
FOREIGN_FRAME(phys_seg[i].buffer >> PAGE_SHIFT);
}
#endif
blkif_ctrlif_init();
-
+
+#ifdef CONFIG_XEN_BLKDEV_TAP_BE
+ printk(KERN_ALERT "NOTE: Blkif backend is running with tap support on!\n");
+#endif
return 0;
}
{
if ( status->handle != blkif_handle )
{
- WPRINTK(" Invalid blkif: handle=%u", status->handle);
+ WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
+ unexpected(status);
return;
}
--- /dev/null
+
+obj-y := blktap_userdev.o blktap_datapath.o blktap_controlmsg.o blktap.o
+
--- /dev/null
+/******************************************************************************
+ * blktap.c
+ *
+ * XenLinux virtual block-device tap.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ * Based on the original split block driver:
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ *
+ * Note that unlike the split block driver code, this driver has been developed
+ * strictly for Linux 2.6
+ */
+
+#include "blktap.h"
+
+int __init xlblk_init(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_t fe_st;
+ blkif_be_driver_status_t be_st;
+
+ printk(KERN_INFO "Initialising Xen block tap device\n");
+
+ DPRINTK(" tap - Backend connection init:\n");
+
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_fe_driver_status_t);
+ fe_st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &fe_st, sizeof(fe_st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ DPRINTK(" tap - Frontend connection init:\n");
+
+ active_reqs_init();
+
+ ptfe_blkif.status = DISCONNECTED;
+
+ (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx,
+ CALLBACK_IN_BLOCKING_CONTEXT);
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_BE;
+ cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_be_driver_status_t);
+ be_st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &be_st, sizeof(be_st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+
+ DPRINTK(" tap - Userland channel init:\n");
+
+ blktap_init();
+
+ DPRINTK("Blkif tap device initialized.\n");
+
+ return 0;
+}
+
+void blkdev_suspend(void)
+{
+}
+
+void blkdev_resume(void)
+{
+ ctrl_msg_t cmsg;
+ blkif_fe_driver_status_t st;
+
+ /* Send a driver-UP notification to the domain controller. */
+ cmsg.type = CMSG_BLKIF_FE;
+ cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS;
+ cmsg.length = sizeof(blkif_fe_driver_status_t);
+ st.status = BLKIF_DRIVER_STATUS_UP;
+ memcpy(cmsg.msg, &st, sizeof(st));
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+
+__initcall(xlblk_init);
--- /dev/null
+/*
+ * blktap.h
+ *
+ * Interfaces for the Xen block tap driver.
+ *
+ * (c) 2004, Andrew Warfield, University of Cambridge
+ *
+ */
+
+#ifndef __BLKTAP_H__
+#define __BLKTAP_H__
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <asm-xen/ctrl_if.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <asm-xen/hypervisor.h>
+#include <asm-xen/xen-public/io/blkif.h>
+
+/* -------[ debug / pretty printing ]--------------------------------- */
+
+#if 0
+#define ASSERT(_p) \
+ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
+ __LINE__, __FILE__); *(int*)0=0; }
+#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \
+ __FILE__ , __LINE__ , ## _a )
+#else
+#define ASSERT(_p) ((void)0)
+#define DPRINTK(_f, _a...) ((void)0)
+#endif
+
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
+
+/* -------[ connection / request tracking ]--------------------------- */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#endif
+
+extern spinlock_t blkif_io_lock;
+
+typedef struct blkif_st {
+ /* Unique identifier for this interface. */
+ domid_t domid;
+ unsigned int handle;
+ /* Physical parameters of the comms window. */
+ unsigned long shmem_frame;
+ unsigned int evtchn;
+ int irq;
+ /* Comms information. */
+ blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
+ BLKIF_RING_IDX blk_req_cons; /* Request consumer. */
+ BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */
+
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
+ /*
+ * DISCONNECT response is deferred until pending requests are ack'ed.
+ * We therefore need to store the id from the original request.
+ */ u8 disconnect_rspid;
+ struct blkif_st *hash_next;
+ struct list_head blkdev_list;
+ spinlock_t blk_ring_lock;
+ atomic_t refcnt;
+
+ struct work_struct work;
+} blkif_t;
+
+typedef struct {
+ blkif_t *blkif;
+ unsigned long id;
+ int nr_pages;
+ unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int next_free;
+} active_req_t;
+
+
+/* -------[ block ring structs ]-------------------------------------- */
+
+/* Types of ring. */
+#define BLKIF_REQ_RING_TYPE 1
+#define BLKIF_RSP_RING_TYPE 2
+
+/* generic ring struct. */
+typedef struct blkif_generic_ring_struct {
+ int type;
+} blkif_generic_ring_t;
+
+/* A requestor's view of a ring. */
+typedef struct blkif_req_ring_struct {
+
+ int type; /* Will be BLKIF_REQ_RING_TYPE */
+ BLKIF_RING_IDX req_prod; /* PRIVATE req_prod index */
+ BLKIF_RING_IDX rsp_cons; /* Response consumer index */
+ blkif_ring_t *ring; /* Pointer to shared ring struct */
+
+} blkif_req_ring_t;
+
+#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 }
+
+/* A responder's view of a ring. */
+typedef struct blkif_rsp_ring_struct {
+
+ int type;
+ BLKIF_RING_IDX rsp_prod; /* PRIVATE rsp_prod index */
+ BLKIF_RING_IDX req_cons; /* Request consumer index */
+ blkif_ring_t *ring; /* Pointer to shared ring struct */
+
+} blkif_rsp_ring_t;
+
+#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 }
+
+#define RING(a) (blkif_generic_ring_t *)(a)
+
+inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring);
+
+
+/* -------[ interposition -> character device interface ]------------- */
+
+/* /dev/xen/blktap resides at device number major=10, minor=200 */
+#define BLKTAP_MINOR 202
+
+/* size of the extra VMA area to map in attached pages. */
+#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE
+
+/* blktap IOCTLs: */
+#define BLKTAP_IOCTL_KICK_FE 1
+#define BLKTAP_IOCTL_KICK_BE 2
+#define BLKTAP_IOCTL_SETMODE 3
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002
+#define BLKTAP_MODE_COPY_FE 0x00000004
+#define BLKTAP_MODE_COPY_BE 0x00000008
+#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010
+#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020
+
+#define BLKTAP_MODE_INTERPOSE \
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+#define BLKTAP_MODE_COPY_BOTH \
+ (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE)
+
+#define BLKTAP_MODE_COPY_BOTH_PAGES \
+ (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES)
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+ return (
+ ( arg == BLKTAP_MODE_PASSTHROUGH ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_FE ) ||
+ ( arg == BLKTAP_MODE_INTERCEPT_BE ) ||
+ ( arg == BLKTAP_MODE_INTERPOSE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) ||
+ ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH )
+ );
+}
+
+
+
+/* -------[ Mappings to User VMA ]------------------------------------ */
+#define MAX_PENDING_REQS 64
+#define BATCH_PER_DOMAIN 16
+extern struct vm_area_struct *blktap_vma;
+
+/* The following are from blkback.c and should probably be put in a
+ * header and included from there.
+ * The mmap area described here is where attached data pages eill be mapped.
+ */
+
+extern unsigned long mmap_vstart;
+#define MMAP_PAGES_PER_REQUEST \
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
+#define MMAP_PAGES \
+ (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
+#define MMAP_VADDR(_req,_seg) \
+ (mmap_vstart + \
+ ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
+ ((_seg) * PAGE_SIZE))
+
+/* immediately before the mmap area, we have a bunch of pages reserved
+ * for shared memory rings.
+ */
+
+#define RING_PAGES 128
+extern unsigned long rings_vstart;
+
+/* -------[ Here be globals ]----------------------------------------- */
+
+extern unsigned long blktap_mode;
+
+
+/* blkif struct, containing ring to FE domain */
+extern blkif_t ptfe_blkif;
+
+/* Connection to a single backend domain. */
+extern blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */
+extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
+extern BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */
+
+/* Rings up to user space. */
+extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
+extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
+
+/* Event channel to backend domain. */
+extern unsigned int blkif_ptbe_evtchn;
+
+/* User ring status... this will soon vanish into a ring struct. */
+extern unsigned long blktap_ring_ok;
+
+/* -------[ ...and function prototypes. ]----------------------------- */
+
+/* init function for character device interface. */
+int blktap_init(void);
+
+/* interfaces to the char driver, passing messages to and from apps. */
+void blktap_kick_user(void);
+int blktap_write_to_ring(blkif_request_t *req);
+
+
+/* user ring access functions: */
+int blktap_write_fe_ring(blkif_request_t *req);
+int blktap_write_be_ring(blkif_response_t *rsp);
+int blktap_read_fe_ring(void);
+int blktap_read_be_ring(void);
+
+/* and the helpers they call: */
+inline int write_resp_to_fe_ring(blkif_response_t *rsp);
+inline void kick_fe_domain(void);
+
+inline int write_req_to_be_ring(blkif_request_t *req);
+inline void kick_be_domain(void);
+
+/* Interrupt handlers. */
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs);
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs);
+
+/* Control message receiver. */
+extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id);
+
+#define __BLKINT_H__
+#endif
--- /dev/null
+/******************************************************************************
+ * blktap_controlmsg.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interfaces to the frontend and backend drivers.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include "blktap.h"
+
+#define BLKIF_STATE_CLOSED 0
+#define BLKIF_STATE_DISCONNECTED 1
+#define BLKIF_STATE_CONNECTED 2
+
+static char *blkif_state_name[] = {
+ [BLKIF_STATE_CLOSED] = "closed",
+ [BLKIF_STATE_DISCONNECTED] = "disconnected",
+ [BLKIF_STATE_CONNECTED] = "connected",
+};
+
+static char * blkif_status_name[] = {
+ [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
+ [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+ [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
+ [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
+};
+static unsigned int blkif_pt_state = BLKIF_STATE_CLOSED;
+static unsigned blkif_ptbe_irq;
+unsigned int blkif_ptbe_evtchn;
+
+/*-----[ Control Messages to/from Frontend VMs ]--------------------------*/
+
+
+void blkif_ptfe_create(blkif_be_create_t *create)
+{
+ blkif_t *blkif;
+ domid_t domid = create->domid;
+ unsigned int handle = create->blkif_handle;
+
+
+ /* May want to store info on the connecting domain here. */
+
+ DPRINTK("PT got BE_CREATE\n");
+ blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
+
+ /* blkif struct init code from blkback.c */
+ memset(blkif, 0, sizeof(*blkif));
+ blkif->domid = domid;
+ blkif->handle = handle;
+ blkif->status = DISCONNECTED;
+ spin_lock_init(&blkif->blk_ring_lock);
+ atomic_set(&blkif->refcnt, 0);
+
+ create->status = BLKIF_BE_STATUS_OKAY;
+}
+
+
+void blkif_ptfe_destroy(blkif_be_destroy_t *destroy)
+{
+ /* Clear anything that we initialized above. */
+
+ DPRINTK("PT got BE_DESTROY\n");
+ destroy->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_connect(blkif_be_connect_t *connect)
+{
+ domid_t domid = connect->domid;
+ /*unsigned int handle = connect->blkif_handle;*/
+ unsigned int evtchn = connect->evtchn;
+ unsigned long shmem_frame = connect->shmem_frame;
+ struct vm_struct *vma;
+ pgprot_t prot;
+ int error;
+ blkif_t *blkif;
+
+ DPRINTK("PT got BE_CONNECT\n");
+
+ blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */
+
+ if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
+ {
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ return;
+ }
+
+ prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
+ error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
+ shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
+ prot, domid);
+ if ( error != 0 )
+ {
+ WPRINTK("BE_CONNECT: error! (%d)\n", error);
+ if ( error == -ENOMEM )
+ connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
+ else if ( error == -EFAULT ) {
+ connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
+ WPRINTK("BE_CONNECT: MAPPING error!\n");
+ }
+ else
+ connect->status = BLKIF_BE_STATUS_ERROR;
+ vfree(vma->addr);
+ return;
+ }
+
+ if ( blkif->status != DISCONNECTED )
+ {
+ connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
+ vfree(vma->addr);
+ return;
+ }
+
+ blkif->evtchn = evtchn;
+ blkif->irq = bind_evtchn_to_irq(evtchn);
+ blkif->shmem_frame = shmem_frame;
+ blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
+ blkif->status = CONNECTED;
+ /*blkif_get(blkif);*/
+
+ request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif);
+
+ connect->status = BLKIF_BE_STATUS_OKAY;
+}
+
+void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect)
+{
+ /*
+ * don't actually set the passthrough to disconnected.
+ * We just act as a pipe, and defer to the real ends to handle things like
+ * recovery.
+ */
+
+ DPRINTK("PT got BE_DISCONNECT\n");
+
+ disconnect->status = BLKIF_BE_STATUS_OKAY;
+ return;
+}
+
+/*-----[ Control Messages to/from Backend VM ]----------------------------*/
+
+/* Tell the controller to bring up the interface. */
+static void blkif_ptbe_send_interface_connect(void)
+{
+ ctrl_msg_t cmsg = {
+ .type = CMSG_BLKIF_FE,
+ .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
+ .length = sizeof(blkif_fe_interface_connect_t),
+ };
+ blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
+ msg->handle = 0;
+ msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT;
+
+ ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
+}
+
+static void blkif_ptbe_close(void)
+{
+}
+
+/* Move from CLOSED to DISCONNECTED state. */
+static void blkif_ptbe_disconnect(void)
+{
+ blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
+ blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod
+ = ptbe_resp_cons = ptbe_req_prod = 0;
+ blkif_pt_state = BLKIF_STATE_DISCONNECTED;
+ DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
+ blkif_ptbe_send_interface_connect();
+}
+
+static void blkif_ptbe_connect(blkif_fe_interface_status_t *status)
+{
+ int err = 0;
+
+ blkif_ptbe_evtchn = status->evtchn;
+ blkif_ptbe_irq = bind_evtchn_to_irq(blkif_ptbe_evtchn);
+
+ err = request_irq(blkif_ptbe_irq, blkif_ptbe_int,
+ SA_SAMPLE_RANDOM, "blkif", NULL);
+ if ( err ) {
+ WPRINTK("blkfront request_irq failed (%d)\n", err);
+ return;
+ } else {
+ /* transtion to connected in case we need to do a
+ a partion probe on a whole disk */
+ blkif_pt_state = BLKIF_STATE_CONNECTED;
+ }
+}
+
+static void unexpected(blkif_fe_interface_status_t *status)
+{
+ WPRINTK(" TAP: Unexpected blkif status %s in state %s\n",
+ blkif_status_name[status->status],
+ blkif_state_name[blkif_pt_state]);
+}
+
+static void blkif_ptbe_status(
+ blkif_fe_interface_status_t *status)
+{
+ if ( status->handle != 0 )
+ {
+ DPRINTK("Status change on unsupported blkif %d\n",
+ status->handle);
+ return;
+ }
+
+ DPRINTK("ptbe_status: got %s\n", blkif_status_name[status->status]);
+
+ switch ( status->status )
+ {
+ case BLKIF_INTERFACE_STATUS_CLOSED:
+ switch ( blkif_pt_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_ptbe_close();
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_DISCONNECTED:
+ switch ( blkif_pt_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ blkif_ptbe_disconnect();
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ case BLKIF_STATE_CONNECTED:
+ printk(KERN_ALERT "*** add recovery code to the tap driver. ***\n");
+ unexpected(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CONNECTED:
+ switch ( blkif_pt_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ unexpected(status);
+ blkif_ptbe_disconnect();
+ blkif_ptbe_connect(status);
+ break;
+ case BLKIF_STATE_DISCONNECTED:
+ blkif_ptbe_connect(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ unexpected(status);
+ blkif_ptbe_connect(status);
+ break;
+ }
+ break;
+
+ case BLKIF_INTERFACE_STATUS_CHANGED:
+ switch ( blkif_pt_state )
+ {
+ case BLKIF_STATE_CLOSED:
+ case BLKIF_STATE_DISCONNECTED:
+ unexpected(status);
+ break;
+ case BLKIF_STATE_CONNECTED:
+ /* vbd_update(); */
+ /* tap doesn't really get state changes... */
+ unexpected(status);
+ break;
+ }
+ break;
+
+ default:
+ DPRINTK("Status change to unknown value %d\n", status->status);
+ break;
+ }
+}
+
+/*-----[ All control messages enter here: ]-------------------------------*/
+
+void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
+{
+ switch ( msg->type )
+ {
+ case CMSG_BLKIF_FE:
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_FE_INTERFACE_STATUS:
+ if ( msg->length != sizeof(blkif_fe_interface_status_t) )
+ goto parse_error;
+ blkif_ptbe_status((blkif_fe_interface_status_t *) &msg->msg[0]);
+ break;
+
+ default:
+ goto parse_error;
+ }
+
+ case CMSG_BLKIF_BE:
+
+ switch ( msg->subtype )
+ {
+ case CMSG_BLKIF_BE_CREATE:
+ if ( msg->length != sizeof(blkif_be_create_t) )
+ goto parse_error;
+ blkif_ptfe_create((blkif_be_create_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DESTROY:
+ if ( msg->length != sizeof(blkif_be_destroy_t) )
+ goto parse_error;
+ blkif_ptfe_destroy((blkif_be_destroy_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_CONNECT:
+ if ( msg->length != sizeof(blkif_be_connect_t) )
+ goto parse_error;
+ blkif_ptfe_connect((blkif_be_connect_t *)&msg->msg[0]);
+ break;
+ case CMSG_BLKIF_BE_DISCONNECT:
+ if ( msg->length != sizeof(blkif_be_disconnect_t) )
+ goto parse_error;
+ blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]);
+ break;
+
+ /* We just ignore anything to do with vbds for now. */
+
+ case CMSG_BLKIF_BE_VBD_CREATE:
+ DPRINTK("PT got VBD_CREATE\n");
+ ((blkif_be_vbd_create_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_DESTROY:
+ DPRINTK("PT got VBD_DESTROY\n");
+ ((blkif_be_vbd_destroy_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_GROW:
+ DPRINTK("PT got VBD_GROW\n");
+ ((blkif_be_vbd_grow_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ case CMSG_BLKIF_BE_VBD_SHRINK:
+ DPRINTK("PT got VBD_SHRINK\n");
+ ((blkif_be_vbd_shrink_t *)&msg->msg[0])->status
+ = BLKIF_BE_STATUS_OKAY;
+ break;
+ default:
+ goto parse_error;
+ }
+ }
+
+ ctrl_if_send_response(msg);
+ return;
+
+ parse_error:
+ msg->length = 0;
+ ctrl_if_send_response(msg);
+}
--- /dev/null
+/******************************************************************************
+ * blktap_datapath.c
+ *
+ * XenLinux virtual block-device tap.
+ * Block request routing data path.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include "blktap.h"
+
+/*-----[ The data paths ]-------------------------------------------------*/
+
+/* Connections to the frontend domains.*/
+blkif_t ptfe_blkif;
+
+/* Connection to a single backend domain. */
+blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */
+BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */
+BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */
+
+/* Rings up to user space. */
+blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT;
+blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT;
+
+/*-----[ Ring helpers ]---------------------------------------------------*/
+
+inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring)
+{
+ if (ring->type == BLKIF_REQ_RING_TYPE) {
+ blkif_req_ring_t *r = (blkif_req_ring_t *)ring;
+ return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE );
+ }
+
+ /* for now assume that there is always room in the response path. */
+ return 0;
+}
+
+/*-----[ Tracking active requests ]---------------------------------------*/
+
+/* this must be the same as MAX_PENDING_REQS in blkback.c */
+#define MAX_ACTIVE_REQS 64
+
+active_req_t active_reqs[MAX_ACTIVE_REQS];
+unsigned char active_req_ring[MAX_ACTIVE_REQS];
+spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED;
+typedef unsigned int ACTIVE_RING_IDX;
+ACTIVE_RING_IDX active_prod, active_cons;
+#define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1))
+#define ACTIVE_IDX(_ar) (_ar - active_reqs)
+
+inline active_req_t *get_active_req(void)
+{
+ ASSERT(active_cons != active_prod);
+ return &active_reqs[MASK_ACTIVE_IDX(active_cons++)];
+}
+
+inline void free_active_req(active_req_t *ar)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&active_req_lock, flags);
+ active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar);
+ spin_unlock_irqrestore(&active_req_lock, flags);
+}
+
+inline void active_reqs_init(void)
+{
+ ACTIVE_RING_IDX i;
+
+ active_cons = 0;
+ active_prod = MAX_ACTIVE_REQS;
+ memset(active_reqs, 0, sizeof(active_reqs));
+ for ( i = 0; i < MAX_ACTIVE_REQS; i++ )
+ active_req_ring[i] = i;
+}
+
+/*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/
+
+irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+ /* we have pending messages from the real frontend. */
+
+ blkif_request_t *req_s, *req_d;
+ BLKIF_RING_IDX fe_rp;
+ unsigned long flags;
+ int notify;
+ unsigned long i;
+ active_req_t *ar;
+
+ DPRINTK("PT got FE interrupt.\n");
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ /* While there are REQUESTS on FERing: */
+ fe_rp = ptfe_blkif.blk_ring_base->req_prod;
+ rmb();
+ notify = (ptfe_blkif.blk_req_cons != fe_rp);
+
+ for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) {
+
+ /* Get the next request */
+ req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req;
+
+ /* This is a new request:
+ * Assign an active request record, and remap the id.
+ */
+ ar = get_active_req();
+ ar->id = req_s->id;
+ req_s->id = ACTIVE_IDX(ar);
+ DPRINTK("%3lu < %3lu\n", req_s->id, ar->id);
+
+ /* FE -> BE interposition point is here. */
+
+ /* ------------------------------------------------------------- */
+ /* BLKIF_OP_PROBE_HACK: */
+ /* Until we have grant tables, we need to allow the backent to */
+ /* map pages that are either from this domain, or more commonly */
+ /* from the real front end. We achieve this in a terrible way, */
+ /* by passing the front end's domid allong with PROBE messages */
+ /* Once grant tables appear, this should all go away. */
+
+ if (req_s->operation == BLKIF_OP_PROBE) {
+ DPRINTK("Adding FE domid to PROBE request.\n");
+ (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid;
+ }
+
+ /* ------------------------------------------------------------- */
+
+ /* If we are in MODE_INTERCEPT_FE or MODE_COPY_FE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ /* Copy the response message to UFERing */
+ /* In MODE_INTERCEPT_FE, map attached pages into the app vma */
+ /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */
+
+ /* XXX: mapping/copying of attached pages is still not done! */
+
+ DPRINTK("req->UFERing\n");
+ blktap_write_fe_ring(req_s);
+
+
+ }
+
+ /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+
+ /* be included to prevent noise from the fe when its off */
+ /* copy the request message to the BERing */
+
+ DPRINTK("blktap: FERing[%u] -> BERing[%u]\n",
+ (unsigned)MASK_BLKIF_IDX(i),
+ (unsigned)MASK_BLKIF_IDX(ptbe_req_prod));
+
+ req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
+
+ memcpy(req_d, req_s, sizeof(blkif_request_t));
+
+ ptbe_req_prod++;
+ }
+ }
+
+ ptfe_blkif.blk_req_cons = i;
+
+ /* If we have forwarded any responses, notify the appropriate ends. */
+ if (notify) {
+
+ /* we have sent stuff to the be, notify it. */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) {
+ wmb();
+ blk_ptbe_ring->req_prod = ptbe_req_prod;
+
+ notify_via_evtchn(blkif_ptbe_evtchn);
+ DPRINTK(" -- and notified.\n");
+ }
+
+ /* we sent stuff to the app, notify it. */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_FE) ) {
+
+ blktap_kick_user();
+ }
+ }
+
+ /* unlock rings */
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+inline int write_req_to_be_ring(blkif_request_t *req)
+{
+ blkif_request_t *req_d;
+
+ req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req;
+ memcpy(req_d, req, sizeof(blkif_request_t));
+ ptbe_req_prod++;
+
+ return 0;
+}
+
+inline void kick_be_domain(void) {
+ wmb();
+ blk_ptbe_ring->req_prod = ptbe_req_prod;
+ notify_via_evtchn(blkif_ptbe_evtchn);
+}
+
+/*-----[ Data to/from Backend (server) VM ]------------------------------*/
+
+
+irqreturn_t blkif_ptbe_int(int irq, void *dev_id,
+ struct pt_regs *ptregs)
+{
+ blkif_response_t *resp_s, *resp_d;
+ BLKIF_RING_IDX be_rp;
+ unsigned long flags;
+ int notify;
+ unsigned long i;
+ active_req_t *ar;
+
+ DPRINTK("PT got BE interrupt.\n");
+
+ /* lock both rings */
+ spin_lock_irqsave(&blkif_io_lock, flags);
+
+ /* While there are RESPONSES on BERing: */
+ be_rp = blk_ptbe_ring->resp_prod;
+ rmb();
+ notify = (ptbe_resp_cons != be_rp);
+
+ for ( i = ptbe_resp_cons; i != be_rp; i++ )
+ {
+ /* BE -> FE interposition point is here. */
+
+ /* Get the next response */
+ resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp;
+
+
+ /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+ /* Copy the response message to UBERing */
+ /* In MODE_INTERCEPT_BE, map attached pages into the app vma */
+ /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */
+
+ /* XXX: copy/map the attached page! */
+
+ DPRINTK("rsp->UBERing\n");
+ blktap_write_be_ring(resp_s);
+
+ }
+
+ /* If we are NOT in MODE_INTERCEPT_BE or MODE_INTERCEPT_FE: */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+
+ /* (fe included to prevent random interference from the BE) */
+ /* Copy the response message to FERing */
+
+ DPRINTK("blktap: BERing[%u] -> FERing[%u]\n",
+ (unsigned) MASK_BLKIF_IDX(i),
+ (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod));
+
+ /* remap id, and free the active req. blkif lookup goes here too.*/
+ ar = &active_reqs[resp_s->id];
+ DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id);
+ resp_s->id = ar->id;
+ free_active_req(ar);
+
+ resp_d = &ptfe_blkif.blk_ring_base->ring[
+ MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
+
+ memcpy(resp_d, resp_s, sizeof(blkif_response_t));
+
+ ptfe_blkif.blk_resp_prod++;
+
+ }
+ }
+
+ ptbe_resp_cons = i;
+
+ /* If we have forwarded any responses, notify the apropriate domains. */
+ if (notify) {
+
+ /* we have sent stuff to the fe. notify it. */
+ if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) {
+ wmb();
+ ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
+
+ notify_via_evtchn(ptfe_blkif.evtchn);
+ DPRINTK(" -- and notified.\n");
+ }
+
+ /* we sent stuff to the app, notify it. */
+ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) ||
+ (blktap_mode & BLKTAP_MODE_COPY_BE) ) {
+
+ blktap_kick_user();
+ }
+ }
+
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
+ return IRQ_HANDLED;
+}
+
+inline int write_resp_to_fe_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *resp_d;
+ active_req_t *ar;
+
+ /* remap id, and free the active req. blkif lookup goes here too.*/
+ ar = &active_reqs[rsp->id];
+ DPRINTK("%3lu > %3lu\n", rsp->id, ar->id);
+ rsp->id = ar->id;
+ free_active_req(ar);
+
+ resp_d = &ptfe_blkif.blk_ring_base->ring[
+ MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp;
+
+ memcpy(resp_d, rsp, sizeof(blkif_response_t));
+ ptfe_blkif.blk_resp_prod++;
+
+ return 0;
+}
+
+inline void kick_fe_domain(void) {
+ wmb();
+ ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod;
+ notify_via_evtchn(ptfe_blkif.evtchn);
+
+}
+
+static inline void flush_requests(void)
+{
+ wmb(); /* Ensure that the frontend can see the requests. */
+ blk_ptbe_ring->req_prod = ptbe_req_prod;
+ notify_via_evtchn(blkif_ptbe_evtchn);
+}
+
+/*-----[ Data to/from user space ]----------------------------------------*/
+
+
+int blktap_write_fe_ring(blkif_request_t *req)
+{
+ blkif_request_t *target;
+ int error, i;
+
+ /*
+ * This is called to pass a request from the real frontend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: fe_ring not ready for a request!\n");
+ return 0;
+ }
+
+ if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) {
+ DPRINTK("blktap: fe_ring is full, can't add.\n");
+ return 0;
+ }
+
+ target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req;
+ memcpy(target, req, sizeof(*req));
+
+/* maybe move this stuff out into a seperate func ------------------- */
+
+ /*
+ * For now, map attached page into a fixed position into the vma.
+ * XXX: make this map to a free page.
+ */
+
+ /* Attempt to map the foreign pages directly in to the application */
+ for (i=0; i<target->nr_segments; i++) {
+
+ /* get an unused virtual address from the char device */
+ /* store the old page address */
+ /* replace the address with the virtual address */
+
+ /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */
+
+ error = direct_remap_area_pages(blktap_vma->vm_mm,
+ MMAP_VADDR(req->id, i),
+ target->frame_and_sects[0] & PAGE_MASK,
+ PAGE_SIZE,
+ blktap_vma->vm_page_prot,
+ ptfe_blkif.domid);
+ if ( error != 0 ) {
+ printk(KERN_INFO "remapping attached page failed! (%d)\n", error);
+ return 0;
+ }
+ }
+ /* fix the address of the attached page in the message. */
+ /* TODO: preserve the segment number stuff here... */
+ /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/
+/* ------------------------------------------------------------------ */
+
+
+ fe_ring.req_prod++;
+
+ return 0;
+}
+
+int blktap_write_be_ring(blkif_response_t *rsp)
+{
+ blkif_response_t *target;
+
+ /*
+ * This is called to pass a request from the real backend domain's
+ * blkif ring to the character device.
+ */
+
+ if ( ! blktap_ring_ok ) {
+ DPRINTK("blktap: be_ring not ready for a request!\n");
+ return 0;
+ }
+
+ if ( BLKTAP_RING_FULL(RING(&be_ring)) ) {
+ DPRINTK("blktap: be_ring is full, can't add.\n");
+ return 0;
+ }
+
+ target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp;
+ memcpy(target, rsp, sizeof(*rsp));
+
+
+ /* XXX: map attached pages and fix-up addresses in the copied address. */
+
+ be_ring.rsp_prod++;
+
+ return 0;
+}
+
+int blktap_read_fe_ring(void)
+{
+ /* This is called to read responses from the UFE ring. */
+
+ BLKIF_RING_IDX fe_rp;
+ unsigned long i;
+ int notify;
+
+ DPRINTK("blktap_read_fe_ring()\n");
+
+ fe_rp = fe_ring.ring->resp_prod;
+ rmb();
+ notify = (fe_rp != fe_ring.rsp_cons);
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) {
+
+ /* for each outstanding message on the UFEring */
+ for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) {
+
+ /* XXX: remap pages on that message as necessary */
+ /* copy the message to the UBEring */
+
+ DPRINTK("resp->fe_ring\n");
+ write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp);
+ }
+
+ fe_ring.rsp_cons = fe_rp;
+
+ /* notify the fe if necessary */
+ if ( notify ) {
+ DPRINTK("kick_fe_domain()\n");
+ kick_fe_domain();
+ }
+ }
+
+ return 0;
+}
+
+int blktap_read_be_ring(void)
+{
+ /* This is called to read responses from the UBE ring. */
+
+ BLKIF_RING_IDX be_rp;
+ unsigned long i;
+ int notify;
+
+ DPRINTK("blktap_read_be_ring()\n");
+
+ be_rp = be_ring.ring->req_prod;
+ rmb();
+ notify = (be_rp != be_ring.req_cons);
+
+ /* if we are forwarding from UFERring to FERing */
+ if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) {
+
+ /* for each outstanding message on the UFEring */
+ for ( i = be_ring.req_cons; i != be_rp; i++ ) {
+
+ /* XXX: remap pages on that message as necessary */
+ /* copy the message to the UBEring */
+
+ DPRINTK("req->be_ring\n");
+ write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req);
+ }
+
+ be_ring.req_cons = be_rp;
+
+ /* notify the fe if necessary */
+ if ( notify ) {
+ DPRINTK("kick_be_domain()\n");
+ kick_be_domain();
+ }
+ }
+
+ return 0;
+}
--- /dev/null
+/******************************************************************************
+ * blktap_userdev.c
+ *
+ * XenLinux virtual block-device tap.
+ * Control interface between the driver and a character device.
+ *
+ * Copyright (c) 2004, Andrew Warfield
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/pgalloc.h>
+
+#include "blktap.h"
+
+
+unsigned long blktap_mode = BLKTAP_MODE_PASSTHROUGH;
+
+/* Only one process may open /dev/xen/blktap at any time. */
+static unsigned long blktap_dev_inuse;
+unsigned long blktap_ring_ok; /* make this ring->state */
+
+/* for poll: */
+static wait_queue_head_t blktap_wait;
+
+/* Where things are inside the device mapping. */
+struct vm_area_struct *blktap_vma;
+unsigned long mmap_vstart;
+unsigned long rings_vstart;
+
+/* -------[ blktap vm ops ]------------------------------------------- */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ /*
+ * if the page has not been mapped in by the driver then generate
+ * a SIGBUS to the domain.
+ */
+
+ force_sig(SIGBUS, current);
+
+ return 0;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+ nopage: blktap_nopage,
+};
+
+/* -------[ blktap file ops ]----------------------------------------- */
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+ if ( test_and_set_bit(0, &blktap_dev_inuse) )
+ return -EBUSY;
+
+ printk(KERN_ALERT "blktap open.\n");
+
+ /* Allocate the fe ring. */
+ fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
+ if (fe_ring.ring == NULL)
+ goto fail_nomem;
+
+ SetPageReserved(virt_to_page(fe_ring.ring));
+
+ fe_ring.ring->req_prod = fe_ring.ring->resp_prod
+ = fe_ring.req_prod
+ = fe_ring.rsp_cons
+ = 0;
+
+ /* Allocate the be ring. */
+ be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL);
+ if (be_ring.ring == NULL)
+ goto fail_free_fe;
+
+ SetPageReserved(virt_to_page(be_ring.ring));
+
+ be_ring.ring->req_prod = be_ring.ring->resp_prod
+ = be_ring.rsp_prod
+ = be_ring.req_cons
+ = 0;
+
+ DPRINTK(KERN_ALERT "blktap open.\n");
+
+ return 0;
+
+ fail_free_fe:
+ free_page( (unsigned long) fe_ring.ring);
+
+ fail_nomem:
+ return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+ blktap_dev_inuse = 0;
+ blktap_ring_ok = 0;
+
+ printk(KERN_ALERT "blktap closed.\n");
+
+ /* Free the ring page. */
+ ClearPageReserved(virt_to_page(fe_ring.ring));
+ free_page((unsigned long) fe_ring.ring);
+
+ ClearPageReserved(virt_to_page(be_ring.ring));
+ free_page((unsigned long) be_ring.ring);
+
+ return 0;
+}
+
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int size;
+
+ printk(KERN_ALERT "blktap mmap (%lx, %lx)\n",
+ vma->vm_start, vma->vm_end);
+
+ vma->vm_ops = &blktap_vm_ops;
+
+ size = vma->vm_end - vma->vm_start;
+ if ( size != ( (MMAP_PAGES + RING_PAGES) << PAGE_SHIFT ) ) {
+ printk(KERN_INFO
+ "blktap: you _must_ map exactly %d pages!\n",
+ MMAP_PAGES + RING_PAGES);
+ return -EAGAIN;
+ }
+
+ size >>= PAGE_SHIFT;
+ printk(KERN_INFO "blktap: 2 rings + %d pages.\n", size-1);
+
+ rings_vstart = vma->vm_start;
+ mmap_vstart = rings_vstart + (RING_PAGES << PAGE_SHIFT);
+
+ /* Map the ring pages to the start of the region and reserve it. */
+
+ /* not sure if I really need to do this... */
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring));
+ if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE,
+ vma->vm_page_prot)) {
+ printk(KERN_ERR "be_ring: remap_page_range failure!\n");
+ }
+
+ DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring));
+ if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring),
+ PAGE_SIZE, vma->vm_page_prot)) {
+ printk(KERN_ERR "fe_ring: remap_page_range failure!\n");
+ }
+
+ blktap_vma = vma;
+ blktap_ring_ok = 1;
+
+ return 0;
+}
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ switch(cmd) {
+ case BLKTAP_IOCTL_KICK_FE: /* There are fe messages to process. */
+ return blktap_read_fe_ring();
+
+ case BLKTAP_IOCTL_KICK_BE: /* There are be messages to process. */
+ return blktap_read_be_ring();
+
+ case BLKTAP_IOCTL_SETMODE:
+ if (BLKTAP_MODE_VALID(arg)) {
+ blktap_mode = arg;
+ /* XXX: may need to flush rings here. */
+ printk(KERN_INFO "blktap: set mode to %lx\n", arg);
+ return 0;
+ }
+ /* XXX: return a more meaningful error case here. */
+ }
+ return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &blktap_wait, wait);
+
+ if ( (fe_ring.req_prod != fe_ring.ring->req_prod) ||
+ (be_ring.rsp_prod != be_ring.ring->resp_prod) ) {
+
+ fe_ring.ring->req_prod = fe_ring.req_prod;
+ be_ring.ring->resp_prod = be_ring.rsp_prod;
+ return POLLIN | POLLRDNORM;
+ }
+
+ return 0;
+}
+
+void blktap_kick_user(void)
+{
+ /* blktap_ring->req_prod = blktap_req_prod; */
+ wake_up_interruptible(&blktap_wait);
+}
+
+static struct file_operations blktap_fops = {
+ owner: THIS_MODULE,
+ poll: blktap_poll,
+ ioctl: blktap_ioctl,
+ open: blktap_open,
+ release: blktap_release,
+ mmap: blktap_mmap,
+};
+
+/* -------[ blktap module setup ]------------------------------------- */
+
+static struct miscdevice blktap_miscdev = {
+ .minor = BLKTAP_MINOR,
+ .name = "blktap",
+ .fops = &blktap_fops,
+ .devfs_name = "misc/blktap",
+};
+
+int blktap_init(void)
+{
+ int err;
+
+ err = misc_register(&blktap_miscdev);
+ if ( err != 0 )
+ {
+ printk(KERN_ALERT "Couldn't register /dev/misc/blktap (%d)\n", err);
+ return err;
+ }
+
+ init_waitqueue_head(&blktap_wait);
+
+
+ return 0;
+}